2019-12-20
This is awesome! We would love to speak to you about creating content for the ATP“s channels, drop us an email at socialmedia@.
file_to_download <- "rankings_1973-2017.csv"
url <- paste0("https://datahub.io/
sports-data/atp-world-tour-tennis-data/r/7.html",
file_to_download)
download.file(url, destfile = file_to_download)
file_to_download <- "player_overviews_unindexed_csv.csv"
url <- paste0("https://datahub.io/sports-data/
atp-world-tour-tennis-data/r/8.html",
file_to_download)
download.file(url, destfile = file_to_download)
atp_database_1 <- import(here::here("./rankings_1973-2017.csv"))
atp_database_2 <- import(here::here("./player_overviews_unindexed_csv.csv"))
df <- full_join(atp_database_1,atp_database_2, by = "player_id")
'data.frame': 2694539 obs. of 14 variables: $ week_title : chr "2017.11.20" "2017.11.20" "2017.11.20" "2017.11.20" ... $ week_year : int 2017 2017 2017 2017 2017 2017 2017 2017 2017 2017 ... $ week_month : int 11 11 11 11 11 11 11 11 11 11 ... $ week_day : int 20 20 20 20 20 20 20 20 20 20 ... $ rank_text : chr "1" "2" "3" "4" ... $ rank_number : int 1 2 3 4 5 6 7 8 9 10 ... $ move_positions : int NA NA 3 1 1 1 1 1 2 NA ... $ move_direction : chr "" "" "up" "down" ... $ player_age : int 31 36 26 20 24 29 26 25 32 26 ... $ ranking_points : int 10645 9605 5150 4610 4015 3805 3775 3165 3150 2615 ... $ tourneys_played: int 18 17 23 25 27 22 26 22 15 25 ... $ player_url : chr "/en/players/rafael-nadal/n409/overview" "/en/players/roger-federer/f324/overview" "/en/players/grigor-dimitrov/d875/overview" ... $ player_slug : chr "rafael-nadal" "roger-federer" ... $ player_id : chr "n409" "f324" "d875" "z355" ...
df1 <- df %>% select(-c(move_positions, move_direction,
player_url.x, player_url.y, player_id, player_slug.y,
residence, birthdate, birth_year, birth_month,
birth_day, turned_pro, weight_lbs, height_ft, height_inches)) %>%
rename(player = player_slug.x, date = week_title,
month = week_month, year = week_year, country = flag_code)
df1 <- df1 %>%
mutate(date = gsub(".", "-", date, fixed = TRUE)) %>%
mutate(player = gsub("-", "_", player, fixed = TRUE)) %>%
mutate(player = gsub("%20", "_", player, fixed = TRUE)) %>%
mutate(player = gsub("juan_martin_del_potro",
"del_potro", player, fixed = TRUE))
df2 <- janitor::clean_names(df1, case = "snake")
names(df2) #> [1] "date" "year" "month" "week_day" #> [5] "rank_text" "rank_number" "player_age" "ranking_points" #> [9] "tourneys_played" "player" "first_name" "last_name" #> [13] "country" "birthplace" "weight_kg" "height_cm" #> [17] "handedness" "backhand"
df3 <- df2 %>% select(date, year, month, country, player,
rank_number, ranking_points) %>%
group_by(date) %>%
filter((year == 2010) & (rank_number < 16)) %>%
group_by(player, date) %>%
arrange(date) %>%
mutate(day = as.numeric(as.Date(date) - 14612)) %>%
as.data.frame() %>%
ungroup()
mutate(day = as.numeric(as.Date(date) - 14612)) %>% as.data.frame()
df4 <- df3 %>% mutate(flag = ifelse(player %in% c("roger_federer",
"rafael_nadal","novak_djokovic",
"andy_murray","del_potro"), TRUE, FALSE),
player_col = if_else(flag == TRUE, player, "zzz"))
| handedness | NN | percent |
|---|---|---|
| Left-Handed | 16 | 0.16 |
| Right-Handed | 84 | 0.84 |
| backhand | NN | percent |
|---|---|---|
| One-Handed Backhand | 21 | 0.21 |
| Two-Handed Backhand | 79 | 0.79 |
| date | altura_media | peso_medio |
|---|---|---|
| 2013-12-30 | 187.21 | 80.99 |
| 2014-12-29 | 186.99 | 80.80 |
| 2015-12-28 | 186.99 | 80.58 |
| 2016-12-26 | 186.80 | 80.08 |
| 2017-11-20 | 187.08 | 80.26 |
| player | birth_year | turned_pro | years_turned_pro |
|---|---|---|---|
| rafael_nadal | 1986 | 2001 | 15 |
| roger_federer | 1981 | 1998 | 17 |
| grigor_dimitrov | 1991 | 2008 | 17 |
| alexander_zverev | 1997 | 2013 | 16 |
| dominic_thiem | 1993 | 2011 | 18 |
| player | birth_year | turned_pro | years_turned_pro |
|---|---|---|---|
| marin_cilic | 1988 | 2005 | 17 |
| david_goffin | 1990 | 2009 | 19 |
| jack_sock | 1992 | 2011 | 19 |
| stan_wawrinka | 1985 | 2002 | 17 |
| pablo_carreno_busta | 1991 | 2009 | 18 |
df3 <- df3 %>%
mutate(country = gsub("SUI", "CHE", country, fixed = TRUE)) %>%
mutate(country = gsub("BUL", "BGR", country, fixed = TRUE)) %>%
mutate(country = gsub("GER", "DEU", country, fixed = TRUE)) %>%
mutate(country = gsub("CRO", "HRV", country, fixed = TRUE)) %>%
mutate(country = gsub("RSA", "RUS", country, fixed = TRUE))
df4 <- df3 %>%
filter(year > 2013) %>%
arrange(date) %>%
group_by(player, year) %>%
slice(n()) %>%
ungroup()